library(tidyverse)
library(readxl)
library(rdrobust)
library(gridExtra)
library(grid)

load("data/shijifabu.RData")
load("data/shengjifabu.RData")

# Grid mobilization during the "dynamic zero-COVID" period
test_list <- read_xlsx("data/sporadic_test.xlsx") |>
  mutate(First_Infected = as.Date(First_Infected))

# Local mobilization
process_data_local <- function(test_list, shijifabu) {
  all_results <- data.frame() 
  
  for (i in 1:nrow(test_list)) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    
    City <- test_list$City[i]
    Scale <- test_list$Scale[i]
    Regional_spreading <- test_list$Regional_spreading[i]
    National_spreading <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date <- First_Infected %m+% months(12)
    
    City_filter <- test_list$City[i]
    
    rd_data <- shijifabu |>
      filter(City %in% City_filter) |>
      filter(date >= start_date & date <= end_date)
    
    rd_data <- rd_data |>
      mutate(difference = as.integer(date - First_Infected),
             others = total_count - daily_grid)
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    rd_control <- rdrobust(rd_data$daily_grid,
                           rd_data$difference,
                           covs = rd_data$others,
                           p = 1,
                           kernel = "triangular",
                           bwselect = "mserd")
    
    conventional_coef <- rd_control[["coef"]]["Conventional", "Coeff"] / daily_total_mean
    conventional_ci_lower <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
    conventional_ci_upper <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    
    rd_results <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = conventional_coef,
      CI_Lower = conventional_ci_lower,
      CI_Upper = conventional_ci_upper
    )
    
    all_results <- bind_rows(all_results, rd_results)
  }
  
  return(all_results)
}

local_results_daily <- process_data_local(test_list, shijifabu)

print(local_results_daily)

save(local_results_daily, file = "generated_data/dynamic_RDD_results/local_results_daily.RData")

# Regional mobilization (daily, no lag)
process_data_regional <- function(test_list, shengjifabu) {
  all_results <- data.frame() 
  
  for (i in 1:nrow(test_list)) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    
    City <- test_list$City[i]
    Scale <- test_list$Scale[i]
    Regional_spreading <- test_list$Regional_spreading[i]
    National_spreading <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date <- First_Infected %m+% months(12)
    
    peripheral_filter <- unlist(strsplit(test_list$Peripheral[i], ","))
    
    rd_data <- shengjifabu |>
      filter(province %in% peripheral_filter) |>
      filter(date >= start_date & date <= end_date)
    
    rd_data$difference <- as.integer(rd_data$date - First_Infected)
    rd_data$week <- NA
    
    rd_data$others <- rd_data$total_count - rd_data$daily_grid
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    rd_control <- rdrobust(rd_data$daily_grid,
                           rd_data$difference,
                           covs = rd_data$others,
                           p = 1,
                           kernel = "triangular",
                           bwselect = "mserd",
                           c = 0)
    
    conventional_coef <- rd_control[["coef"]]["Conventional", "Coeff"] / daily_total_mean
    conventional_ci_lower <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
    conventional_ci_upper <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    
    rd_results <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = conventional_coef,
      CI_Lower = conventional_ci_lower,
      CI_Upper = conventional_ci_upper
    )
    
    all_results <- bind_rows(all_results, rd_results)
  }
  
  return(all_results)
}

regional_results_daily <- process_data_regional(test_list, shengjifabu)

print(regional_results_daily)

save(regional_results_daily, file = "generated_data/dynamic_RDD_results/regional_results_daily.RData")

# National mobilization (daily, no lag)
process_data_national <- function(test_list, shengjifabu) {
  all_results <- vector("list", nrow(test_list))
  
  for (i in seq_len(nrow(test_list))) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    City <- test_list$City[i]
    Scale <- test_list$Scale[i]
    Regional_spreading <- test_list$Regional_spreading[i]
    National_spreading <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date   <- First_Infected %m+% months(12)
    
    national_raw <- test_list$National_level[i]
    national <- if (is.na(national_raw) || national_raw == "") character(0) else {
      trimws(unlist(strsplit(national_raw, ",")))
    }
    
    rd_data <- shengjifabu |>
      mutate(date = as.Date(date)) |>
      filter(!(province %in% national)) |>
      filter(date >= start_date & date <= end_date) |>
      group_by(date) |>
      summarise(
        total_count = sum(total_count, na.rm = TRUE),
        daily_grid  = sum(daily_grid,  na.rm = TRUE),
        .groups = "drop"
      ) |>
      complete(date = seq.Date(start_date, end_date, by = "day"),
               fill = list(total_count = 0L, daily_grid = 0L)) |>
      arrange(date) |>
      mutate(
        difference = as.integer(date - First_Infected),
        others     = pmax(total_count - daily_grid, 0L) 
      )
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    if (is.na(daily_total_mean) || daily_total_mean == 0) {
      coef <- NA_real_; ci_l <- NA_real_; ci_u <- NA_real_
    } else {
      rd_control <- rdrobust(
        rd_data$daily_grid,
        rd_data$difference,
        covs = rd_data$others,
        p = 1,
        kernel = "triangular",
        bwselect = "mserd",
        c = 0
      )
      coef <- rd_control[["coef"]]["Conventional", "Coeff"]      / daily_total_mean
      ci_l <- rd_control[["ci"]]["Conventional", "CI Lower"]     / daily_total_mean
      ci_u <- rd_control[["ci"]]["Conventional", "CI Upper"]     / daily_total_mean
    }
    
    all_results[[i]] <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = coef,
      CI_Lower = ci_l,
      CI_Upper = ci_u,
      stringsAsFactors = FALSE
    )
  }
  
  dplyr::bind_rows(all_results)
}

national_results_daily <- process_data_national(test_list, shengjifabu)

print(national_results_daily)

save(national_results_daily, file = "generated_data/dynamic_RDD_results/national_results_daily.RData")

# Plot local mobilization (daily, no lag)
local_results_daily <- local_results_daily |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

local_results_daily$First_Infected <- format(as.Date(local_results_daily$First_Infected), "%d %b, %Y")

local_results_daily <- local_results_daily |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

local_mobilization <- ggplot(local_results_daily, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  
  scale_x_continuous(breaks = local_results_daily$Order, labels = local_results_daily$First_Infected) +
  labs(title = "",
       x = "",
       y = "Local Mobilization") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_text(size = 16),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

print(local_mobilization)

# Plot regional mobilization (daily)
regional_results_daily <- regional_results_daily |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

regional_results_daily$First_Infected <- format(as.Date(regional_results_daily$First_Infected), "%d %b, %Y")

regional_results_daily <- regional_results_daily |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

regional_mobilization <- ggplot(regional_results_daily, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  
  scale_x_continuous(breaks = regional_results_daily$Order, labels = regional_results_daily$City) +
  labs(title = "",
       x = "",
       y = "Regional Mobilization") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

print(regional_mobilization)

# Plot national mobilization (daily, no lag)
national_results_daily <- national_results_daily |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

national_results_daily$First_Infected <- format(as.Date(national_results_daily$First_Infected), "%d %b, %Y")

national_results_daily <- national_results_daily |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

national_mobilization <- ggplot(national_results_daily, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  # Empty circles for 0, solid circles for 1
  scale_x_continuous(breaks = national_results_daily$Order, , labels = national_results_daily$City, position = "top") +
  labs(title = "",
       x = "",
       y = "National Mobilization") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_text(size = 16),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

national_mobilization

title_text4 <- "FIGURE 4. Instant Mobilization Effects at Local, Regional, and National Levels"
title_grob4 <- grobTree(
  textGrob(title_text4, 
           gp = gpar(fontsize = 24, fontface = "bold"), y = -0.6)
)

# Notes (bottom of figure)
note_text4 <- "\nThis plot presents the instant mobilization effects at all three levels. The size of each point indicates the scale of the local outbreak. Solid points \nrepresent outbreaks that spread to other cities, while hollow points represent those that did not. Points colored in black indicate the statistical \nsignificance of each mobilization effect. The left side indicates the date the first infection was detected, and the right side indicates the city \nwhere the outbreak occurred."
note_grob4 <- textGrob(note_text4, 
                       gp = gpar(fontsize = 16), 
                       x = 0.1, hjust = 0)

figure4 <- arrangeGrob(
  local_mobilization,
  regional_mobilization,
  national_mobilization,
  ncol = 3,
  widths = c(1.08, 0.84, 1.08) 
)

figure4
ggsave(filename = "plots/FIGURE4.png", plot = figure4, width = 16.5, height = 9.2, units = "in", dpi = 200)

# Regional mobilization (lagged)
# Regional mobilization (lagged 7 days)
process_data_regional_lag7 <- function(test_list, shengjifabu) {
  all_results <- data.frame() 
  
  for (i in 1:nrow(test_list)) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    
    City <- test_list$City[i]
    Scale <- test_list$Scale[i]
    Regional_spreading <- test_list$Regional_spreading[i]
    National_spreading <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date <- First_Infected %m+% months(12)
    
    peripheral_filter <- unlist(strsplit(test_list$Peripheral[i], ","))
    
    rd_data <- shengjifabu |>
      filter(province %in% peripheral_filter) |>
      filter(date >= start_date & date <= end_date)
    
    rd_data$difference <- as.integer(rd_data$date - First_Infected)
    rd_data$week <- NA
    
    rd_data$others <- rd_data$total_count - rd_data$daily_grid
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    rd_control <- rdrobust(rd_data$daily_grid,
                           rd_data$difference,
                           covs = rd_data$others,
                           p = 1,
                           kernel = "triangular",
                           bwselect = "mserd",
                           c = 7)
    
    conventional_coef <- rd_control[["coef"]]["Conventional", "Coeff"] / daily_total_mean
    conventional_ci_lower <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
    conventional_ci_upper <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    
    rd_results <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = conventional_coef,
      CI_Lower = conventional_ci_lower,
      CI_Upper = conventional_ci_upper
    )
    
    all_results <- bind_rows(all_results, rd_results)
  }
  
  return(all_results)
}

regional_results_daily_lag7 <- process_data_regional_lag7(test_list, shengjifabu)

print(regional_results_daily_lag7)

save(regional_results_daily_lag7, file = "generated_data/dynamic_RDD_results/regional_results_daily_lag7.RData")

# Regional mobilization (lagged 14 days)
process_data_regional_lag14 <- function(test_list, shengjifabu) {
  all_results <- data.frame() 
  
  for (i in 1:nrow(test_list)) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    
    City <- test_list$City[i]
    Scale <- test_list$Scale[i]
    Regional_spreading <- test_list$Regional_spreading[i]
    National_spreading <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date <- First_Infected %m+% months(12)
    
    peripheral_filter <- unlist(strsplit(test_list$Peripheral[i], ","))
    
    rd_data <- shengjifabu |>
      filter(province %in% peripheral_filter) |>
      filter(date >= start_date & date <= end_date)
    
    rd_data$difference <- as.integer(rd_data$date - First_Infected)
    rd_data$week <- NA
    
    rd_data$others <- rd_data$total_count - rd_data$daily_grid
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    rd_control <- rdrobust(rd_data$daily_grid,
                           rd_data$difference,
                           covs = rd_data$others,
                           p = 1,
                           kernel = "triangular",
                           bwselect = "mserd",
                           c = 14)
    
    conventional_coef <- rd_control[["coef"]]["Conventional", "Coeff"] / daily_total_mean
    conventional_ci_lower <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
    conventional_ci_upper <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    
    rd_results <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = conventional_coef,
      CI_Lower = conventional_ci_lower,
      CI_Upper = conventional_ci_upper
    )
    
    all_results <- bind_rows(all_results, rd_results)
  }
  
  return(all_results)
}

regional_results_daily_lag14 <- process_data_regional_lag14(test_list, shengjifabu)

print(regional_results_daily_lag14)

save(regional_results_daily_lag14, file = "generated_data/dynamic_RDD_results/regional_results_daily_lag14.RData")

# Regional mobilization (lagged 21 days)
process_data_regional_lag21 <- function(test_list, shengjifabu) {
  all_results <- data.frame() 
  
  for (i in 1:nrow(test_list)) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    
    City <- test_list$City[i]
    Scale <- test_list$Scale[i]
    Regional_spreading <- test_list$Regional_spreading[i]
    National_spreading <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date <- First_Infected %m+% months(12)
    
    peripheral_filter <- unlist(strsplit(test_list$Peripheral[i], ","))
    
    rd_data <- shengjifabu |>
      filter(province %in% peripheral_filter) |>
      filter(date >= start_date & date <= end_date)
    
    rd_data$difference <- as.integer(rd_data$date - First_Infected)
    rd_data$week <- NA
    
    rd_data$others <- rd_data$total_count - rd_data$daily_grid
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    rd_control <- rdrobust(rd_data$daily_grid,
                           rd_data$difference,
                           covs = rd_data$others,
                           p = 1,
                           kernel = "triangular",
                           bwselect = "mserd",
                           c = 21)
    
    conventional_coef <- rd_control[["coef"]]["Conventional", "Coeff"] / daily_total_mean
    conventional_ci_lower <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
    conventional_ci_upper <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    
    rd_results <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = conventional_coef,
      CI_Lower = conventional_ci_lower,
      CI_Upper = conventional_ci_upper
    )
    
    all_results <- bind_rows(all_results, rd_results)
  }
  
  return(all_results)
}

regional_results_daily_lag21 <- process_data_regional_lag21(test_list, shengjifabu)

print(regional_results_daily_lag21)

save(regional_results_daily_lag21, file = "generated_data/dynamic_RDD_results/regional_results_daily_lag21.RData")

# Plot regional mobilization (daily, lagged 7 days)
regional_results_daily_lag7 <- regional_results_daily_lag7 |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

regional_results_daily_lag7$First_Infected <- format(as.Date(regional_results_daily_lag7$First_Infected), "%d %b, %Y")

regional_results_daily_lag7 <- regional_results_daily_lag7 |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

regional_mobilization_lag7 <- ggplot(regional_results_daily_lag7, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  
  scale_x_continuous(breaks = regional_results_daily_lag7$Order, labels = regional_results_daily_lag7$First_Infected) +
  labs(title = "",
       x = "",
       y = "Regional Mobilization, Lagged 7 Days") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_text(size = 16),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

print(regional_mobilization_lag7)

# Plot regional mobilization (daily, lagged 14 days)
regional_results_daily_lag14 <- regional_results_daily_lag14 |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

regional_results_daily_lag14$First_Infected <- format(as.Date(regional_results_daily_lag14$First_Infected), "%d %b, %Y")

regional_results_daily_lag14 <- regional_results_daily_lag14 |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

regional_mobilization_lag14 <- ggplot(regional_results_daily_lag14, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  
  scale_x_continuous(breaks = regional_results_daily_lag14$Order, labels = regional_results_daily_lag14$City) +
  labs(title = "",
       x = "",
       y = "Regional Mobilization, Lagged 14 Days") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

print(regional_mobilization_lag14)

# Plot regional mobilization (daily, lagged 21 days)
regional_results_daily_lag21 <- regional_results_daily_lag21 |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

regional_results_daily_lag21$First_Infected <- format(as.Date(regional_results_daily_lag21$First_Infected), "%d %b, %Y")

regional_results_daily_lag21 <- regional_results_daily_lag21 |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

regional_mobilization_lag21 <- ggplot(regional_results_daily_lag21, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  # Empty circles for 0, solid circles for 1
  scale_x_continuous(breaks = regional_results_daily_lag21$Order, labels = regional_results_daily_lag21$City, position = "top") +
  labs(title = "",
       x = "",
       y = "Regional Mobilization, Lagged 21 Days") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_text(size = 16),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

regional_mobilization_lag21


title_text5 <- "FIGURE 5. Lagged Mobilization Effects at the Regional Level"
title_grob5 <- grobTree(
  textGrob(title_text5, 
           gp = gpar(fontsize = 24, fontface = "bold"), y = -0.6)
)

# Notes (bottom of figure)
note_text5 <- "\nThis plot presents the lagged mobilization effects at the regional level. The size of each point indicates the scale of the local outbreak. Solid \npoints represent outbreaks that spread to other cities, while hollow points represent those that did not. Points colored in black indicate the \nstatistical significance of each mobilization effect. The left side indicates the date the first infection was detected, and the right side indicates the \ncity where the outbreak occurred."
note_grob5 <- textGrob(note_text5, 
                       gp = gpar(fontsize = 16), 
                       x = 0.1, hjust = 0.0)


figure5_top <- arrangeGrob(
  regional_mobilization_lag7,
  regional_mobilization_lag14,
  regional_mobilization_lag21,
  ncol = 3,
  # Title and notes are added separately if needed
  widths = c(1.08, 0.84, 1.08) 
)

ggsave(filename = "plots/FIGURE5_top.png", plot = figure5_top, width = 16.5, height = 9.2, units = "in", dpi = 200)

# National mobilization (lagged)
# National mobilization (daily, lagged 7 days)
process_data_national_lag7 <- function(test_list, shengjifabu) {
  all_results <- vector("list", nrow(test_list))
  
  for (i in seq_len(nrow(test_list))) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    City                <- test_list$City[i]
    Scale               <- test_list$Scale[i]
    Regional_spreading  <- test_list$Regional_spreading[i]
    National_spreading  <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date   <- First_Infected %m+% months(12)
    
    nl_raw <- test_list$National_level[i]
    national <- if (is.na(nl_raw) || nl_raw == "") character(0) else {
      trimws(unlist(strsplit(nl_raw, ",")))
    }
    
    rd_data <- shengjifabu |>
      mutate(date = as.Date(date)) |>
      filter(!(province %in% national)) |>
      filter(date >= start_date & date <= end_date) |>
      group_by(date) |>
      summarise(
        total_count = sum(total_count, na.rm = TRUE),
        daily_grid  = sum(daily_grid,  na.rm = TRUE),
        .groups = "drop"
      ) |>
      complete(date = seq.Date(start_date, end_date, by = "day"),
               fill = list(total_count = 0L, daily_grid = 0L)) |>
      arrange(date) |>
      mutate(
        difference = as.integer(date - First_Infected),   
        others     = pmax(total_count - daily_grid, 0L)   
      )
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    if (is.na(daily_total_mean) || daily_total_mean == 0) {
      coef <- NA_real_; ci_l <- NA_real_; ci_u <- NA_real_
    } else {
      rd_control <- rdrobust(
        y = rd_data$daily_grid,
        x = rd_data$difference,
        covs = rd_data$others,
        p = 1,
        kernel = "triangular",
        bwselect = "mserd",
        c = 7
      )
      coef <- rd_control[["coef"]]["Conventional", "Coeff"]  / daily_total_mean
      ci_l <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
      ci_u <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    }
    
    all_results[[i]] <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = coef,
      CI_Lower = ci_l,
      CI_Upper = ci_u,
      stringsAsFactors = FALSE
    )
  }
  
  dplyr::bind_rows(all_results)
}

national_results_daily_lag7 <- process_data_national_lag7(test_list, shengjifabu)

print(national_results_daily_lag7)

save(national_results_daily_lag7, file = "generated_data/dynamic_RDD_results/national_results_daily_lag7.RData")

# National mobilization (daily, lagged 14 days)
process_data_national_lag14 <- function(test_list, shengjifabu) {
  all_results <- vector("list", nrow(test_list))
  
  for (i in seq_len(nrow(test_list))) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    City                <- test_list$City[i]
    Scale               <- test_list$Scale[i]
    Regional_spreading  <- test_list$Regional_spreading[i]
    National_spreading  <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date   <- First_Infected %m+% months(12)
    
    nl_raw <- test_list$National_level[i]
    national <- if (is.na(nl_raw) || nl_raw == "") character(0) else {
      trimws(unlist(strsplit(nl_raw, ",")))
    }
    
    rd_data <- shengjifabu |>
      mutate(date = as.Date(date)) |>
      filter(!(province %in% national)) |>
      filter(date >= start_date & date <= end_date) |>
      group_by(date) |>
      summarise(
        total_count = sum(total_count, na.rm = TRUE),
        daily_grid  = sum(daily_grid,  na.rm = TRUE),
        .groups = "drop"
      ) |>
      complete(date = seq.Date(start_date, end_date, by = "day"),
               fill = list(total_count = 0L, daily_grid = 0L)) |>
      arrange(date) |>
      mutate(
        difference = as.integer(date - First_Infected),   
        others     = pmax(total_count - daily_grid, 0L)   
      )
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    if (is.na(daily_total_mean) || daily_total_mean == 0) {
      coef <- NA_real_; ci_l <- NA_real_; ci_u <- NA_real_
    } else {
      rd_control <- rdrobust(
        y = rd_data$daily_grid,
        x = rd_data$difference,
        covs = rd_data$others,
        p = 1,
        kernel = "triangular",
        bwselect = "mserd",
        c = 14
      )
      coef <- rd_control[["coef"]]["Conventional", "Coeff"]  / daily_total_mean
      ci_l <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
      ci_u <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    }
    
    all_results[[i]] <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = coef,
      CI_Lower = ci_l,
      CI_Upper = ci_u,
      stringsAsFactors = FALSE
    )
  }
  
  dplyr::bind_rows(all_results)
}

national_results_daily_lag14 <- process_data_national_lag14(test_list, shengjifabu)

print(national_results_daily_lag14)

save(national_results_daily_lag14, file = "generated_data/dynamic_RDD_results/national_results_daily_lag14.RData")

# National mobilization (daily, lagged 21 days)
process_data_national_lag21 <- function(test_list, shengjifabu) {
  all_results <- vector("list", nrow(test_list))
  
  for (i in seq_len(nrow(test_list))) {
    First_Infected <- as.Date(test_list$First_Infected[i])
    City                <- test_list$City[i]
    Scale               <- test_list$Scale[i]
    Regional_spreading  <- test_list$Regional_spreading[i]
    National_spreading  <- test_list$National_spreading[i]
    
    start_date <- First_Infected %m-% months(12)
    end_date   <- First_Infected %m+% months(12)
    
    nl_raw <- test_list$National_level[i]
    national <- if (is.na(nl_raw) || nl_raw == "") character(0) else {
      trimws(unlist(strsplit(nl_raw, ",")))
    }
    
    rd_data <- shengjifabu |>
      mutate(date = as.Date(date)) |>
      filter(!(province %in% national)) |>
      filter(date >= start_date & date <= end_date) |>
      group_by(date) |>
      summarise(
        total_count = sum(total_count, na.rm = TRUE),
        daily_grid  = sum(daily_grid,  na.rm = TRUE),
        .groups = "drop"
      ) |>
      complete(date = seq.Date(start_date, end_date, by = "day"),
               fill = list(total_count = 0L, daily_grid = 0L)) |>
      arrange(date) |>
      mutate(
        difference = as.integer(date - First_Infected),   
        others     = pmax(total_count - daily_grid, 0L)   
      )
    
    daily_total_mean <- mean(rd_data$total_count, na.rm = TRUE)
    
    if (is.na(daily_total_mean) || daily_total_mean == 0) {
      coef <- NA_real_; ci_l <- NA_real_; ci_u <- NA_real_
    } else {
      rd_control <- rdrobust(
        y = rd_data$daily_grid,
        x = rd_data$difference,
        covs = rd_data$others,
        p = 1,
        kernel = "triangular",
        bwselect = "mserd",
        c = 21
      )
      coef <- rd_control[["coef"]]["Conventional", "Coeff"]  / daily_total_mean
      ci_l <- rd_control[["ci"]]["Conventional", "CI Lower"] / daily_total_mean
      ci_u <- rd_control[["ci"]]["Conventional", "CI Upper"] / daily_total_mean
    }
    
    all_results[[i]] <- data.frame(
      City = City,
      Scale = Scale,
      Regional_spreading = Regional_spreading,
      National_spreading = National_spreading,
      First_Infected = First_Infected,
      Coefficient = coef,
      CI_Lower = ci_l,
      CI_Upper = ci_u,
      stringsAsFactors = FALSE
    )
  }
  
  dplyr::bind_rows(all_results)
}

national_results_daily_lag21 <- process_data_national_lag21(test_list, shengjifabu)

print(national_results_daily_lag21)

save(national_results_daily_lag21, file = "generated_data/dynamic_RDD_results/national_results_daily_lag21.RData")

# Plot national mobilization (daily, lagged 7 days)
national_results_daily_lag7 <- national_results_daily_lag7 |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

national_results_daily_lag7$First_Infected <- format(as.Date(national_results_daily_lag7$First_Infected), "%d %b, %Y")

national_results_daily_lag7 <- national_results_daily_lag7 |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

national_mobilization_lag7 <- ggplot(national_results_daily_lag7, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  
  scale_x_continuous(breaks = national_results_daily_lag7$Order, labels = national_results_daily_lag7$First_Infected) +
  labs(title = "",
       x = "",
       y = "National Mobilization, Lagged 7 Days") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        axis.text.y = element_text(size = 16),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

print(national_mobilization_lag7)

# Plot national mobilization (daily, lagged 14 days)
national_results_daily_lag14 <- national_results_daily_lag14 |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

national_results_daily_lag14$First_Infected <- format(as.Date(national_results_daily_lag14$First_Infected), "%d %b, %Y")

national_results_daily_lag14 <- national_results_daily_lag14 |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

national_mobilization_lag14 <- ggplot(national_results_daily_lag14, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  
  scale_x_continuous(breaks = national_results_daily_lag14$Order, labels = national_results_daily_lag14$City) +
  labs(title = "",
       x = "",
       y = "National Mobilization, Lagged 14 Days") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

print(national_mobilization_lag14)

# Plot national mobilization (daily, lagged 21 days)
national_results_daily_lag21 <- national_results_daily_lag21 |>
  arrange(desc(First_Infected)) |>
  mutate(Order = row_number())

national_results_daily_lag21$First_Infected <- format(as.Date(national_results_daily_lag21$First_Infected), "%d %b, %Y")

national_results_daily_lag21 <- national_results_daily_lag21 |>
  mutate(Significance = ifelse(CI_Lower > 0 & CI_Upper > 0, 1, 0))

national_mobilization_lag21 <- ggplot(national_results_daily_lag21, aes(x = Order, y = Coefficient)) +
  geom_point(aes(size = Scale * 2, color = as.factor(Significance), 
                 fill = as.factor(Significance), shape = as.factor(Regional_spreading)), 
             stroke = 2) +
  geom_errorbar(aes(ymin = CI_Lower, ymax = CI_Upper, color = as.factor(Significance)), 
                width = 0.4, linewidth = 0.8, linetype = "solid") +
  geom_hline(yintercept = 0, color = "#0033FF", linewidth = 1, linetype = "dashed") +
  scale_size_continuous(range = c(2, 10)) +
  scale_color_manual(values = c("0" = "lightgray", "1" = "black")) +
  scale_fill_manual(values = c("0" = "lightgray", "1" = "black")) + 
  scale_shape_manual(values = c("0" = 1, "1" = 16)) +  # Empty circles for 0, solid circles for 1
  scale_x_continuous(breaks = national_results_daily_lag21$Order, labels = national_results_daily_lag21$City, position = "top") +
  labs(title = "",
       x = "",
       y = "National Mobilization, Lagged 21 Days") +
  theme_bw() +
  theme(legend.position = "none",
        text = element_text(size = 18),
        # Axis labels intentionally hidden for a cleaner facet layout
        axis.text.y = element_text(size = 16),
        axis.text.x = element_blank(),  
        axis.ticks.x = element_blank(),  
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, linewidth = 1)) +
  coord_flip()

national_mobilization_lag21


title_text6 <- "FIGURE A10-2. Lagged Mobilization Effect at the National Level"
title_grob6 <- grobTree(
  textGrob(title_text6, 
           gp = gpar(fontsize = 24, fontface = "bold"), y = -0.6)
)

# Notes (bottom of figure)
note_text6 <- "\nThis plot presents the lagged mobilization effects at the national level. The size of each point indicates the scale of the local outbreak. Solid \npoints represent outbreaks that spread to other cities, while hollow points represent those that did not. Points colored in black indicate the \nstatistical significance of each mobilization effect. The left side indicates the date the first infection was detected, and the right side indicates the \ncity where the outbreak occurred."
note_grob6 <- textGrob(note_text6, 
                       gp = gpar(fontsize = 16), 
                       x = 0.1, hjust = 0.0)


figure5_bottom <- arrangeGrob(
  national_mobilization_lag7,
  national_mobilization_lag14,
  national_mobilization_lag21,
  ncol = 3,
  widths = c(1.08, 0.84, 1.08) 
)

ggsave(filename = "plots/figure5_bottom.png", plot = figure5_bottom, width = 16.5, height = 9.2, units = "in", dpi = 200)

# Combine the top and bottom panels into a single figure (Figure 5, revised)
figure_combined5_top_and_bottom <- grid.arrange(figure5_top, figure5_bottom, ncol = 1)
ggsave(filename = "plots/figure_combined5_top_and_bottom.png", plot = figure_combined5_top_and_bottom, width = 16.5, height = 18.4, units = "in", dpi = 200)
